#!/usr/bin/env ruby

# Copyright (c) 2021-2024 Huawei Device Co., Ltd.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

include_relative 'common.irt'

function(:EmptyPostWriteBarrier,
        params: {},
        regmap: $full_regmap,
        regalloc_set: RegMask.new,
        mode: [:FastPath]) {
    if Options.arch == :arm32
        Intrinsic(:UNREACHABLE).Terminator.void
        next
    end
    ReturnVoid().void
}

def PostInterGenerationalBarrier(obj_num)
    if obj_num == 0
        params = {mem: 'ref_uint'}
        mask = RegMask.new($full_regmap, :arg0, :tmp0, :tmp1)
    elsif obj_num == 1
        params = {mem: 'ref_uint', offset: 'word', obj1: 'ref_uint'}
        mask = RegMask.new($full_regmap, :arg0, :arg1, :tmp0)
    elsif obj_num == 2
        params = {mem: 'ref_uint', offset: 'word', obj1: 'ref_uint', obj2: 'ref_uint'}
        mask = RegMask.new($full_regmap, :arg0, :arg1, :arg2)
    else
        raise "Wrong obj_num #{obj_num}"
    end

    function("PostInterGenerationalBarrier#{obj_num}".to_sym,
             params: params,
             regmap: $full_regmap,
             regalloc_set: mask,
             mode: [:FastPath]) {
        if Options.arch == :arm32
            Intrinsic(:UNREACHABLE).Terminator.void
            next
        end

        min_addr := LoadI(%tr).Imm(Constants::TLS_CARD_TABLE_MIN_ADDR_OFFSET).word
        cards := LoadI(%tr).Imm(Constants::TLS_CARD_TABLE_ADDR_OFFSET).ptr

        mem_word := Cast(mem).SrcType(Constants::REF_UINT).word
        card_offset := ShrI(Sub(mem_word, min_addr).word).Imm(Constants::CARD_TABLE_CARD_BITS).word
        card := Add(cards, card_offset).ptr
        StoreI(card, Constants::CARD_DIRTY_VALUE).Imm(Constants::CARD_VALUE_OFFSET).u8
        ReturnVoid().void
   }
end

# G1 PostWrite barrier
# - Checks if mem and obj are in different regions
# - Checks if GC Card for mem is not marked, then marks it
# - Pushes Card to G1 LockFreeBuffer concurrently, GC consumer thread will fetch the Card
def PostInterRegionBarrierMarkSingleFast()
    params = {mem: 'ref_uint', offset: 'word', obj1: 'ref_uint'}
    mask = RegMask.new($full_regmap, :arg0, :arg1, :tmp0)

    function("PostInterRegionBarrierMarkSingleFast".to_sym,
             params: params,
             regmap: $full_regmap,
             regalloc_set: mask,
             mode: [:FastPath]) {
        if Options.arch == :arm32
            Intrinsic(:UNREACHABLE).Terminator.void
            next
        end

        If(obj1, 0).EQ.Unlikely {
            Goto(:Done)
        }

        If(ShrI(Xor(mem, obj1).ref_uint).Imm(Constants::REGION_SIZE_BIT).ref_uint, 0).EQ.Likely {
            Goto(:Done)
        }

        ref_addr := Add(mem, offset).ref_uint
        LiveOut(ref_addr).DstReg(regmap[:arg0]).ref_uint
        ep_offset = get_entrypoint_offset("POST_INTER_REGION_BARRIER_SLOW")
        Intrinsic(:TAIL_CALL).AddImm(ep_offset).MethodAsImm("PostInterRegionBarrierSlow").Terminator.void

    Label(:Done)
        ReturnVoid().void
    }
end

# G1 PostWrite barrier for StorePair instruction.
# Similar to the previous, but:
# - Checks if the 2nd object's card should be marked;
# - If not, checks the 1st object, marks it's card if neccessary and exits.
# - Otherwise, if 1st and 2nd object's cards are the same (i.e.
# -     2nd store addr isn't aligned on a beggining of a card), marks the card and exits.
# - Otherwise, marks the 2nd object's card as well as the 1st obj's card.
def PostInterRegionBarrierMarkPairFast()
    params = {mem: 'ref_uint', offset: 'word', obj1: 'ref_uint', obj2: 'ref_uint'}
    mask = RegMask.new($full_regmap, :arg0, :arg1, :arg2)
    if Options.arch == :x86_64
        mask += :tmp0
    end

    function("PostInterRegionBarrierMarkPairFast".to_sym,
             params: params,
             regmap: $full_regmap,
             regalloc_set: mask,
             mode: [:FastPath]) {
        if Options.arch == :arm32
            Intrinsic(:UNREACHABLE).Terminator.void
            next
        end

        
        ref_addr_obj1 := Add(mem, offset).ref_uint
        
        If(obj2, 0).EQ.Unlikely {
            Goto(:Check1)
        }
        ref_addr_obj2 := AddI(ref_addr_obj1.ref_uint).Imm(Constants::REFERENCE_TYPE_SIZE).ref_uint

        If(ShrI(Xor(mem, obj2).ref_uint).Imm(Constants::REGION_SIZE_BIT).ref_uint, 0).NE.Unlikely {
            If(AndI(ref_addr_obj2).Imm(Constants::CARD_ALIGNMENT_MASK).ref_uint, 0).EQ.Unlikely {
                LiveOut(ref_addr_obj2).DstReg(regmap[:arg0]).ref_uint
                ep_offset = get_entrypoint_offset("POST_INTER_REGION_BARRIER_TWO_CARDS_SLOW")
                Intrinsic(:TAIL_CALL).AddImm(ep_offset).MethodAsImm("PostInterRegionBarrierTwoCardsSlow").Terminator.void
            }
            # No need to sub REFERENCE_TYPE_SIZE as the card will be the same:
            Goto(:Call1)
        }
    
    Label(:Check1)
        If(obj1, 0).EQ.Unlikely {
            Goto(:Done)
        }
    
        If(ShrI(Xor(mem, obj1).ref_uint).Imm(Constants::REGION_SIZE_BIT).ref_uint, 0).EQ.Likely {
            Goto(:Done)
        }

    Label(:Call1)
        phi := Phi(ref_addr_obj2, ref_addr_obj1).ref_uint
        LiveOut(phi).DstReg(regmap[:arg0]).ref_uint
        ep_offset = get_entrypoint_offset("POST_INTER_REGION_BARRIER_SLOW")
        Intrinsic(:TAIL_CALL).AddImm(ep_offset).MethodAsImm("PostInterRegionBarrierSlow").Terminator.void

    Label(:Done)
        ReturnVoid().void
    }
end

scoped_macro(:push_dirty_card_to_buffer) do |cards, min_addr, mem|
    mem_word := Cast(mem).SrcType(Constants::REF_UINT).word
    card_offset := ShrI(Sub(mem_word, min_addr).word).Imm(Constants::CARD_TABLE_CARD_BITS).word
    card := Add(cards, card_offset).ptr
    card_value := LoadI(card).Imm(Constants::CARD_VALUE_OFFSET).u8

    # it is expected that young card cannot be enqueued
    # so we can avoid card_value & CARD_STATUS_MASK operation
    is_card_young := Compare(card_value, Constants::CARD_YOUNG_VALUE).EQ.b
    IfImm(is_card_young).Imm(0).NE.b {
        Goto(:Done)
    }

    # StoreLoad barrier is required to guarantee order of previous reference store and card load
    Intrinsic(:DATA_MEMORY_BARRIER_FULL).void
    card_value := LoadI(card).Imm(Constants::CARD_VALUE_OFFSET).u8
    card_status := AndI(card_value).Imm(Constants::CARD_STATUS_MASK).u8

    is_card_clear := Compare(card_status, Constants::CARD_CLEAR_VALUE).EQ.b
    IfImm(is_card_clear).Imm(0).EQ.Unlikely.b {
        Goto(:Done)
    }
    Intrinsic(:ATOMIC_BYTE_OR, card, Constants::CARD_MARKED_VALUE).void

    hot_bit := AndI(card_value).Imm(Constants::CARD_HOT_FLAG).u8
    If(hot_bit, Constants::CARD_HOT_FLAG).EQ.b {
        Goto(:Done)
    }

    buffer := LoadI(%tr).Imm(Constants::MANAGED_THREAD_G1_POST_BARRIER_BUFFER_OFFSET).ptr
    buffer_data := AddI(buffer).Imm(Constants::G1_LOCK_BUFFER_DATA_OFFSET).ptr
    tail_index := LoadI(buffer).Imm(Constants::G1_LOCK_BUFFER_TAIL_OFFSET).word
    tail_offset := ShlI(tail_index).Imm(Constants::POINTER_LOG_SIZE).word
    next_tail_index := AndI(AddI(tail_index).Imm(1).word).Imm(Constants::G1_LOCK_BUFFER_SIZE_MASK).word

    Label(:TryPushLoop)
    head_index := LoadI(buffer).Imm(Constants::G1_LOCK_BUFFER_HEAD_OFFSET).Volatile.word
    If(next_tail_index, head_index).EQ {
        Goto(:TryPushLoop)
    }

    Store(buffer_data, tail_offset, card).ptr
    StoreI(buffer, next_tail_index).Imm(Constants::G1_LOCK_BUFFER_TAIL_OFFSET).Volatile.word

    Label(:Done)
end

def PostInterRegionBarrierSlow()
    if Options.arch == :arm64
        mask = RegMask.new($full_regmap, :arg0, :callee0, :callee1, :callee2, :tmp0, :tmp1)
    elsif Options.arch == :x86_64
        mask = RegMask.new($full_regmap, :arg0, :callee0, :caller0, :caller3, :tmp0, :tmp1)
    else
        mask = $panda_mask
    end

    function(:PostInterRegionBarrierSlow,
            params: {mem: 'ref_uint'},
            regmap: $full_regmap,
            regalloc_set: mask,
            mode: [:FastPath]) {
        if Options.arch == :arm32
            Intrinsic(:UNREACHABLE).Terminator.void
            next
        end

        min_addr := LoadI(%tr).Imm(Constants::TLS_CARD_TABLE_MIN_ADDR_OFFSET).word
        cards := LoadI(%tr).Imm(Constants::TLS_CARD_TABLE_ADDR_OFFSET).ptr
        push_dirty_card_to_buffer(cards, min_addr, mem)
        ReturnVoid().void
    }
end

def PostInterRegionBarrierTwoCardsSlow()
    if Options.arch == :arm64
        mask = RegMask.new($full_regmap, :arg0, :callee0, :callee1, :callee2, :tmp0, :tmp1)
    elsif Options.arch == :x86_64
        mask = RegMask.new($full_regmap, :arg0, :callee0, :caller0, :caller3, :tmp0, :tmp1)
    else
        mask = $panda_mask
    end

    function(:PostInterRegionBarrierTwoCardsSlow,
            params: {mem: 'ref_uint'},
            regmap: $full_regmap,
            regalloc_set: mask,
            mode: [:FastPath]) {
        if Options.arch == :arm32
            Intrinsic(:UNREACHABLE).Terminator.void
            next
        end

        min_addr := LoadI(%tr).Imm(Constants::TLS_CARD_TABLE_MIN_ADDR_OFFSET).word
        cards := LoadI(%tr).Imm(Constants::TLS_CARD_TABLE_ADDR_OFFSET).ptr
        # `mem` here is a pointer to the 2nd slot of the pair
        push_dirty_card_to_buffer(cards, min_addr, mem)
        push_dirty_card_to_buffer(cards, min_addr, SubI(mem).Imm(Constants::REFERENCE_TYPE_SIZE).ref_uint)
        ReturnVoid().void
    }
end

PostInterRegionBarrierSlow()
PostInterRegionBarrierTwoCardsSlow()

PostInterGenerationalBarrier(0)
PostInterGenerationalBarrier(1)
PostInterGenerationalBarrier(2)

PostInterRegionBarrierMarkSingleFast()
PostInterRegionBarrierMarkPairFast()
